Skip to content

Commuting Patterns

Using meaningful locations queries

In this worked example we demonstrate the use of FlowKit to investigate commuting patterns. We will use meaningful_locations_aggregate queries to calculate subscribers' home and work locations, following methods developed by Isaacman et al. and Zagatti et al..

The Jupyter notebook for this worked example can be downloaded here, or can be run using the quick start setup.

Load FlowClient and connect to FlowAPI

We start by importing FlowClient. We also import geopandas and mapboxgl, which we will use later to to visualise the data.

Mapbox requires an access token, which should be set as the environment variable MAPBOX_ACCESS_TOKEN.

import flowclient
import os
import numpy as np
import geopandas as gpd
import mapboxgl
from mapboxgl.utils import create_color_stops

mapbox_token = os.environ["MAPBOX_ACCESS_TOKEN"]

We must next generate an API access token using FlowAuth, and paste the token here as TOKEN. Once we have a token, we can start a connection to the FlowAPI system.

conn = flowclient.connect(
    url=os.getenv("FLOWAPI_URL", "http://localhost:9090"), token=TOKEN
)

Create meaningful locations queries

We assign a day-of-week score of +1 to events which occur on weekdays (Monday-Friday), and a score of -1 to weekends (Saturday, Sunday). We assign an hour-of-day score of +1 to events during "working hours", which we define here as 08:00-17:00, and a score of -1 to evening hours 19:00-07:00. We then define two labels: we label locations with a positive hour-of-day score as "work", and locations with a negative hour-of-day score as "home".

tower_day_of_week_scores = {
    "monday": 1,
    "tuesday": 1,
    "wednesday": 1,
    "thursday": 1,
    "friday": 1,
    "saturday": -1,
    "sunday": -1,
}

tower_hour_of_day_scores = [
    -1, -1, -1, -1, -1, -1, -1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, -1, -1, -1, -1, -1
]

meaningful_locations_labels = {
    "home": {
        "type": "Polygon",
        "coordinates": [[[-1, 1], [-1, -1], [-1e-06, -1], [-1e-06, 1]]],
    },
    "work": {
        "type": "Polygon",
        "coordinates": [[[0, 1], [0, -1], [1, -1], [1, 1]]],
    },
}

Having defined our labels, we now pass these to the meaningful_locations_aggregate function to create parameter dictionaries for two meaningful locations queries: a "home location", which will count the number of subscribers with "evening" locations in each level 3 adminstrative region, and a "work location", which will instead count "daytime" locations.

home_locations_spec = flowclient.meaningful_locations_aggregate(
    start_date="2016-01-01",
    end_date="2016-01-07",
    label="home",
    labels=meaningful_locations_labels,
    tower_day_of_week_scores=tower_day_of_week_scores,
    tower_hour_of_day_scores=tower_hour_of_day_scores,
    aggregation_unit="admin3",
)
work_locations_spec = flowclient.meaningful_locations_aggregate(
    start_date="2016-01-01",
    end_date="2016-01-07",
    label="work",
    labels=meaningful_locations_labels,
    tower_day_of_week_scores=tower_day_of_week_scores,
    tower_hour_of_day_scores=tower_hour_of_day_scores,
    aggregation_unit="admin3",
)

We pass these parameters to the get_result function, to get the results of the queries as pandas DataFrames.

home_locations = flowclient.get_result(
    connection=conn, query=home_locations_spec
)

work_locations = flowclient.get_result(
    connection=conn, query=work_locations_spec
)

Visualise the distributions of home/work locations

We use the get_geography function to download the geography for the level 3 administrative regions.

# Download geography data as GeoJSON
regions = flowclient.get_geography(connection=conn, aggregation_unit="admin3")

# Create a geopandas GeoDataFrame from the GeoJSON
regions_geodataframe = gpd.GeoDataFrame.from_features(regions)

We can now combine the geography data with the results of our meaningful locations queries to create a choropleth map showing the distribution of home/work locations, using the geoviews library for visualisation.

# Join location counts to geography data
locations_geodataframe = (
    regions_geodataframe.drop(columns="centroid")
    .join(
        home_locations.drop(columns="label").set_index("pcod"),
        on="admin3pcod",
        how="left",
    )
    .join(
        work_locations.drop(columns="label").set_index("pcod"),
        on="admin3pcod",
        lsuffix="_home",
        rsuffix="_work",
        how="left",
    )
    .fillna(0)
)

# Rename columns for map labels
locations_geodataframe = locations_geodataframe.rename(
    columns={
        "admin3pcod": "P-code",
        "admin3name": "Name",
        "total_home": "Total (home)",
        "total_work": "Total (work)",
    }
)
locations_to_show = "home"  # "work"

# Limit for the colour scale
max_total = max([home_locations["total"].max(), work_locations["total"].max()])
locations_viz = mapboxgl.ChoroplethViz(
    locations_geodataframe.__geo_interface__,
    access_token=mapbox_token,
    color_property=f"Total ({locations_to_show})",
    color_stops=create_color_stops(
        np.linspace(0, max_total, 9), colors="YlGn"
    ),
    opacity=0.8,
    line_color="black",
    line_width=0.5,
    legend_gradient=True,
    legend_layout="horizontal",
    legend_text_numeric_precision=0,
    below_layer="waterway-label",
    center=(84.1, 28.4),
    zoom=5.5,
)

locations_viz.show()

Calculate commuter flows

In addition to looking at the distributions of our two meaningful locations separately, we can calculate an origin-destination matrix between the two labels. We call the meaningful_locations_between_label_od_matrix function to create a query specification, and pass this to get_result to get the result of the query.

od_matrix_spec = flowclient.meaningful_locations_between_label_od_matrix(
    start_date="2016-01-01",
    end_date="2016-01-07",
    label_a="home",
    label_b="work",
    labels=meaningful_locations_labels,
    tower_day_of_week_scores=tower_day_of_week_scores,
    tower_hour_of_day_scores=tower_hour_of_day_scores,
    aggregation_unit="admin3",
)
od_matrix = flowclient.get_result(connection=conn, query=od_matrix_spec)

To calculate the number of subscribers who commute into each region from a different region, we first calculate the number of subscribers in each region with both their home and work locations in the same region (which we call commuters_within_region here), and then subtract this from the total flows into each region.

Similarly, we can subtract commuters_within_region from the total flows out of each region to calculate the number of people with home locations in each region who commute to other regions.

commuters_within_region = (
    od_matrix[od_matrix.pcod_from == od_matrix.pcod_to]
    .drop(columns=["label_from", "label_to", "pcod_from"])
    .set_index("pcod_to")
)

commuters_into_region = (
    od_matrix.groupby("pcod_to")
    .sum()
    .subtract(commuters_within_region, fill_value=0)
)
commuters_out_from_region = (
    od_matrix.groupby("pcod_from")
    .sum()
    .subtract(commuters_within_region, fill_value=0)
)

As with the meaningful locations above, we can combine these commuter in/outflows with the geography data to visualise the results on a choropleth map.

# Join location counts to geography data
commuters_geodataframe = (
    regions_geodataframe.drop(columns="centroid")
    .join(commuters_into_region, on="admin3pcod", how="left")
    .join(
        commuters_out_from_region,
        on="admin3pcod",
        lsuffix="_in",
        rsuffix="_out",
        how="left",
    )
    .fillna(0)
)

# Rename columns for map labels
commuters_geodataframe = commuters_geodataframe.rename(
    columns={
        "admin3pcod": "P-code",
        "admin3name": "Name",
        "total_in": "Commuters in",
        "total_out": "Commuters out",
    }
)
direction_to_show = "in"  # "out"

# Limit for the colour scale
max_total = max(
    [
        commuters_into_region["total"].max(),
        commuters_out_from_region["total"].max(),
        1,
    ]
)

commuters_viz = mapboxgl.ChoroplethViz(
    commuters_geodataframe.__geo_interface__,
    access_token=mapbox_token,
    color_property=f"Commuters {direction_to_show}",
    color_stops=create_color_stops(
        np.linspace(0, max_total, 9), colors="YlGn"
    ),
    opacity=0.8,
    line_color="black",
    line_width=0.5,
    legend_gradient=True,
    legend_layout="horizontal",
    legend_text_numeric_precision=0,
    below_layer="waterway-label",
    center=(84.1, 28.4),
    zoom=5.5,
)

commuters_viz.show()